1

# install.packages("tidyverse")
# install.packages("data.table")
library(tidyverse)
library(data.table)
df2022 <- fread("ad_viz_plotval_data1.csv")
df2002 <- fread("ad_viz_plotval_data2.csv")
dim(df2022)#Check dimension
[1] 59756    22
dim(df2002)#Check dimension
[1] 15976    22
names(df2022)#Check variable name
 [1] "Date"                           "Source"                        
 [3] "Site ID"                        "POC"                           
 [5] "Daily Mean PM2.5 Concentration" "Units"                         
 [7] "Daily AQI Value"                "Local Site Name"               
 [9] "Daily Obs Count"                "Percent Complete"              
[11] "AQS Parameter Code"             "AQS Parameter Description"     
[13] "Method Code"                    "Method Description"            
[15] "CBSA Code"                      "CBSA Name"                     
[17] "State FIPS Code"                "State"                         
[19] "County FIPS Code"               "County"                        
[21] "Site Latitude"                  "Site Longitude"                
names(df2002)#Check variable name
 [1] "Date"                           "Source"                        
 [3] "Site ID"                        "POC"                           
 [5] "Daily Mean PM2.5 Concentration" "Units"                         
 [7] "Daily AQI Value"                "Local Site Name"               
 [9] "Daily Obs Count"                "Percent Complete"              
[11] "AQS Parameter Code"             "AQS Parameter Description"     
[13] "Method Code"                    "Method Description"            
[15] "CBSA Code"                      "CBSA Name"                     
[17] "State FIPS Code"                "State"                         
[19] "County FIPS Code"               "County"                        
[21] "Site Latitude"                  "Site Longitude"                
apply(df2022,2,class)#Check variable type
                          Date                         Source 
                   "character"                    "character" 
                       Site ID                            POC 
                   "character"                    "character" 
Daily Mean PM2.5 Concentration                          Units 
                   "character"                    "character" 
               Daily AQI Value                Local Site Name 
                   "character"                    "character" 
               Daily Obs Count               Percent Complete 
                   "character"                    "character" 
            AQS Parameter Code      AQS Parameter Description 
                   "character"                    "character" 
                   Method Code             Method Description 
                   "character"                    "character" 
                     CBSA Code                      CBSA Name 
                   "character"                    "character" 
               State FIPS Code                          State 
                   "character"                    "character" 
              County FIPS Code                         County 
                   "character"                    "character" 
                 Site Latitude                 Site Longitude 
                   "character"                    "character" 
apply(df2002,2,class)#Check variable type
                          Date                         Source 
                   "character"                    "character" 
                       Site ID                            POC 
                   "character"                    "character" 
Daily Mean PM2.5 Concentration                          Units 
                   "character"                    "character" 
               Daily AQI Value                Local Site Name 
                   "character"                    "character" 
               Daily Obs Count               Percent Complete 
                   "character"                    "character" 
            AQS Parameter Code      AQS Parameter Description 
                   "character"                    "character" 
                   Method Code             Method Description 
                   "character"                    "character" 
                     CBSA Code                      CBSA Name 
                   "character"                    "character" 
               State FIPS Code                          State 
                   "character"                    "character" 
              County FIPS Code                         County 
                   "character"                    "character" 
                 Site Latitude                 Site Longitude 
                   "character"                    "character" 

2

newdat <- rbind(df2002,df2022)
newdat$Date <- mdy(newdat$Date)
newdat$Year <- year(newdat$Date)
names(newdat)[3] <- "SiteID"
names(newdat)[21] <- "Latitude"
names(newdat)[22] <- "Longitude"

3

# install.packages("leaflet")
library(leaflet)
newdat$color <- ifelse(newdat$Year == "2002","red","blue")
# Create a basic map
leaflet(newdat) %>%
  addTiles() %>% # Add OpenStreetMap tiles
  addCircleMarkers(lng = ~Longitude, lat = ~Latitude,color = ~ color,label = ~SiteID)

4

sum(is.na(newdat$`Daily Mean PM2.5 Concentration`))
[1] 0
newdat %>% 
  group_by(`Daily Mean PM2.5 Concentration`) %>% 
  summarise(Count = n()) %>% 
  mutate(porp = Count / sum(Count))
# A tibble: 833 × 3
   `Daily Mean PM2.5 Concentration` Count      porp
                              <dbl> <int>     <dbl>
 1                             -6.7     1 0.0000132
 2                             -6.3     1 0.0000132
 3                             -5.1     1 0.0000132
 4                             -4.7     2 0.0000264
 5                             -4.1     1 0.0000132
 6                             -3.1     1 0.0000132
 7                             -3       1 0.0000132
 8                             -2.2     2 0.0000264
 9                             -2.1     1 0.0000132
10                             -2       1 0.0000132
# ℹ 823 more rows
newdat %>% 
  group_by(Year) %>% 
  summarise(MeanPM25 = mean(`Daily Mean PM2.5 Concentration`),
            SdPM25 = sd(`Daily Mean PM2.5 Concentration`)) 
# A tibble: 2 × 3
   Year MeanPM25 SdPM25
  <int>    <dbl>  <dbl>
1  2002    16.1   13.9 
2  2022     8.43   7.64

5

summary(newdat$`Daily Mean PM2.5 Concentration`)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  -6.70    4.50    7.60   10.05   12.20  302.50 
summary(newdat$`Daily AQI Value`)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    0.0    25.0    42.0    43.5    57.0   454.0 
ggplot(newdat,aes(x = `Daily AQI Value`))+
  geom_boxplot()

ggplot(newdat,aes(x = `Daily AQI Value`,y = `Daily Mean PM2.5 Concentration`))+
  geom_point()

newdat %>% 
  group_by(Year) %>% 
  summarise(MeanPM25 = mean(`Daily Mean PM2.5 Concentration`)) %>% 
  ggplot(aes(x = Year,y = MeanPM25))+
  geom_bar(stat = "identity")